#!/usr/bin/env python3
from extract import extract_li, extract_table, extract
import req as requests
import html

def fetch_rank_page(url):
    r = requests.get(url, timeout=60)
    table = extract_table("<table>", r.content.decode('utf-8','ignore'))
    for rank, year, link, total, usa, no_usa  in table:
        rank = int(rank.replace(',',''))
        year = int(extract('>','<', year))
        title = extract('">','<', link)
        link = extract('"','"',link).split("#")[0][7:]
        total = int(total.replace(',','').replace("$",""))
        title = html.unescape(title)

        print(year, total, link, title)
    return len(table)

def main():
    i = 0
    while 1:
        url = f"https://www.the-numbers.com/box-office-records/worldwide/all-movies/cumulative/all-time/{i*10}1"
        i += 1
        if 100 != fetch_rank_page(url):
            break

if __name__ == "__main__":
    main()

